package com.ccb.textinputformat;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

import com.google.common.base.Charsets;

public class P6CustomTextInputFormat extends TextInputFormat {

	@Override
	public RecordReader<LongWritable, Text> createRecordReader(InputSplit split, TaskAttemptContext context) {
		// char[] sep = { 0x03, 0x04 };
		// String delimiter = String.valueOf(sep);
		String delimiter = "\u0003\u0004";
		byte[] recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8);
		return new LineRecordReader(recordDelimiterBytes);
	}

}
